###########################################################################
# Simulations to Determine the Accuracy of Endogeneity of Effect Models
# Including Specification Tests (t-tests vs. AIC)
# Dr. Justin Esarey and Dr. Jacqueline H. R. Demeritt
# Models With Unit Effects, Varying N
# May 30, 2011
###########################################################################


# model with unit effects, N=10


rm(list=ls())
set.seed(2092375)
require(gplots)

accuracy.x<-c()
accuracy.x.hi<-c()
accuracy.x.lo<-c()

accuracy.z<-c()
accuracy.z.hi<-c()
accuracy.z.lo<-c()

accuracy.lag<-c()
accuracy.lag.hi<-c()
accuracy.lag.lo<-c()

accuracy.prod<-c()
accuracy.prod.hi<-c()
accuracy.prod.lo<-c()

rmse.lo<-c()
rmse.mid<-c()
rmse.hi<-c()

rmse.misspec.lo<-c()
rmse.misspec.mid<-c()
rmse.misspec.hi<-c()



reps<-1000
n.unit<-10                 # How many units (panels)?



##############################################
# Model WITH unit effects
# N = 10, T = 5
#
##############################################


n.t<-5                     # How many observations per panel (time length)?

# draw the beta coefficients for each model
b.intercept<-runif(reps, min=-3, max=3)
b.x<-runif(reps, min=-2, max=2)
b.int<-runif(reps, min=-0.4, max=0.4)
b.lag<-runif(reps, min=-0.4, max=0.4)
b.z<-runif(reps, min=-2, max=2)

print(paste("Simulating n= ", n.unit, ", t= ", n.t, sep=""), quote=F)  

#store the true and estimated betas, where estimates come from a biased and a correct model
true<-cbind(b.intercept,b.lag,b.x,b.z,b.int)
est<-matrix(data=NA, nrow=reps, ncol=5)
bias<-matrix(data=NA, nrow=reps, ncol=4)

pb <- txtProgressBar(min = 0, max = reps, style = 3)

# run the monte carlo simulations
for(r in 1:reps){

	
	setTxtProgressBar(pb, r)
	
	unit<-c()
	period<-c()
	x<-c()
	z<-c()
	y<-c()
	l.y<-c()
	unit.effect<-runif(n.unit, min=-3, max=3)

	for(i in 1:n.unit){

		for(j in 1:(n.t+1)){

			unit[length(unit)+1]<-i
			period[length(period)+1]<-(j-1)
			x[length(x)+1]<-runif(1, min=-3, max=3)
      z[length(z)+1]<-runif(1, min=-3, max=3)

			if(j!=1){l.y[length(l.y)+1] <- y[length(y)]; y[length(y) + 1 ] <- unit.effect[i] + b.lag[r]*y[length(y)] + (b.x[r]+b.int[r]*y[length(y)])*x[length(x)]+b.z[r]*z[length(z)]+rnorm(1, mean=0, sd=3)}
			else{l.y[length(l.y)+1]<-NA; y[length(y)+1]<-unit.effect[i] + runif(1, min=-2, max=2)}

		}

	}

	dat<-cbind(unit, period, x, z, y, l.y)
	dat<-subset(dat, period>0)
	dat<-as.data.frame(dat)

  est[r,]<-c(0,coefficients(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))[c(1,2,3,14)])
	bias[r,]<-c(0,coefficients(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))[c(1,2,3)])



  if(r==1){rmse<-c()}
  y.correct<-b.intercept[r]+b.lag[r]*dat$l.y+b.x[r]*dat$x+b.z[r]*dat$z+b.int[r]*dat$l.y*dat$x+unit.effect[dat$unit]
  y.pred<-predict(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))
  rmse[r]<-sqrt(mean((y.pred-y.correct)^2))
  if(r==1){rmse.misspec<-c()}
  y.misspec<-predict(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))
  rmse.misspec[r]<-sqrt(mean((y.misspec-y.correct)^2))
  

}

close(pb)

rmse.lo[1]<-quantile(rmse, probs=0.025)
rmse.mid[1]<-quantile(rmse, probs=0.5)
rmse.hi[1]<-quantile(rmse, probs=0.975)

rmse.misspec.lo[1]<-quantile(rmse.misspec, probs=0.025)
rmse.misspec.mid[1]<-quantile(rmse.misspec, probs=0.5)
rmse.misspec.hi[1]<-quantile(rmse.misspec, probs=0.975)


accuracy.x.lo[1]<-quantile(est[,3]-true[,3], probs=0.025)
accuracy.x[1]<-quantile(est[,3]-true[,3], probs=0.5)
accuracy.x.hi[1]<-quantile(est[,3]-true[,3], probs=0.975)

accuracy.prod.lo[1]<-quantile(est[,5]-true[,5], probs=0.025)
accuracy.prod[1]<-quantile(est[,5]-true[,5], probs=0.5)
accuracy.prod.hi[1]<-quantile(est[,5]-true[,5], probs=0.975)

accuracy.z.lo[1]<-quantile(est[,4]-true[,4], probs=0.025)
accuracy.z[1]<-quantile(est[,4]-true[,4], probs=0.5)
accuracy.z.hi[1]<-quantile(est[,4]-true[,4], probs=0.975)

accuracy.lag.lo[1]<-quantile(est[,2]-true[,2], probs=0.025)
accuracy.lag[1]<-quantile(est[,2]-true[,2], probs=0.5)
accuracy.lag.hi[1]<-quantile(est[,2]-true[,2], probs=0.975)










##############################################
# Model WITH unit effects
# N = 10, T = 10
#
##############################################


n.t<-10                    # How many observations per panel (time length)?

# draw the beta coefficients for each model
b.intercept<-runif(reps, min=-3, max=3)
b.x<-runif(reps, min=-2, max=2)
b.int<-runif(reps, min=-0.4, max=0.4)
b.lag<-runif(reps, min=-0.4, max=0.4)
b.z<-runif(reps, min=-2, max=2)

print(paste("Simulating n= ", n.unit, ", t= ", n.t, sep=""), quote=F)  

#store the true and estimated betas, where estimates come from a biased and a correct model
true<-cbind(b.intercept,b.lag,b.x,b.z,b.int)
est<-matrix(data=NA, nrow=reps, ncol=5)
bias<-matrix(data=NA, nrow=reps, ncol=4)

pb <- txtProgressBar(min = 0, max = reps, style = 3)

# run the monte carlo simulations
for(r in 1:reps){

	
	setTxtProgressBar(pb, r)
	
	unit<-c()
	period<-c()
	x<-c()
	z<-c()
	y<-c()
	l.y<-c()
	unit.effect<-runif(n.unit, min=-3, max=3)

	for(i in 1:n.unit){

		for(j in 1:(n.t+1)){

			unit[length(unit)+1]<-i
			period[length(period)+1]<-(j-1)
			x[length(x)+1]<-runif(1, min=-3, max=3)
      z[length(z)+1]<-runif(1, min=-3, max=3)

			if(j!=1){l.y[length(l.y)+1] <- y[length(y)]; y[length(y) + 1 ] <- unit.effect[i] + b.lag[r]*y[length(y)] + (b.x[r]+b.int[r]*y[length(y)])*x[length(x)]+b.z[r]*z[length(z)]+rnorm(1, mean=0, sd=3)}
			else{l.y[length(l.y)+1]<-NA; y[length(y)+1]<-unit.effect[i] + runif(1, min=-2, max=2)}

		}

	}

	dat<-cbind(unit, period, x, z, y, l.y)
	dat<-subset(dat, period>0)
	dat<-as.data.frame(dat)

  est[r,]<-c(0,coefficients(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))[c(1,2,3,14)])
	bias[r,]<-c(0,coefficients(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))[c(1,2,3)])




  if(r==1){rmse<-c()}
  y.correct<-b.intercept[r]+b.lag[r]*dat$l.y+b.x[r]*dat$x+b.z[r]*dat$z+b.int[r]*dat$l.y*dat$x+unit.effect[dat$unit]
  y.pred<-predict(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))
  rmse[r]<-sqrt(mean((y.pred-y.correct)^2))
  if(r==1){rmse.misspec<-c()}
  y.misspec<-predict(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))
  rmse.misspec[r]<-sqrt(mean((y.misspec-y.correct)^2))
  

}

close(pb)

rmse.lo[2]<-quantile(rmse, probs=0.025)
rmse.mid[2]<-quantile(rmse, probs=0.5)
rmse.hi[2]<-quantile(rmse, probs=0.975)

rmse.misspec.lo[2]<-quantile(rmse.misspec, probs=0.025)
rmse.misspec.mid[2]<-quantile(rmse.misspec, probs=0.5)
rmse.misspec.hi[2]<-quantile(rmse.misspec, probs=0.975)


accuracy.x.lo[2]<-quantile(est[,3]-true[,3], probs=0.025)
accuracy.x[2]<-quantile(est[,3]-true[,3], probs=0.5)
accuracy.x.hi[2]<-quantile(est[,3]-true[,3], probs=0.975)

accuracy.prod.lo[2]<-quantile(est[,5]-true[,5], probs=0.025)
accuracy.prod[2]<-quantile(est[,5]-true[,5], probs=0.5)
accuracy.prod.hi[2]<-quantile(est[,5]-true[,5], probs=0.975)

accuracy.z.lo[2]<-quantile(est[,4]-true[,4], probs=0.025)
accuracy.z[2]<-quantile(est[,4]-true[,4], probs=0.5)
accuracy.z.hi[2]<-quantile(est[,4]-true[,4], probs=0.975)

accuracy.lag.lo[2]<-quantile(est[,2]-true[,2], probs=0.025)
accuracy.lag[2]<-quantile(est[,2]-true[,2], probs=0.5)
accuracy.lag.hi[2]<-quantile(est[,2]-true[,2], probs=0.975)









##############################################
# Model WITH unit effects
# N = 10, T = 20
#
##############################################


n.t<-20                     # How many observations per panel (time length)?

# draw the beta coefficients for each model
b.intercept<-runif(reps, min=-3, max=3)
b.x<-runif(reps, min=-2, max=2)
b.int<-runif(reps, min=-0.4, max=0.4)
b.lag<-runif(reps, min=-0.4, max=0.4)
b.z<-runif(reps, min=-2, max=2)

print(paste("Simulating n= ", n.unit, ", t= ", n.t, sep=""), quote=F)  

#store the true and estimated betas, where estimates come from a biased and a correct model
true<-cbind(b.intercept,b.lag,b.x,b.z,b.int)
est<-matrix(data=NA, nrow=reps, ncol=5)
bias<-matrix(data=NA, nrow=reps, ncol=4)

pb <- txtProgressBar(min = 0, max = reps, style = 3)

# run the monte carlo simulations
for(r in 1:reps){

	
	setTxtProgressBar(pb, r)
	
	unit<-c()
	period<-c()
	x<-c()
	z<-c()
	y<-c()
	l.y<-c()
	unit.effect<-runif(n.unit, min=-3, max=3)

	for(i in 1:n.unit){

		for(j in 1:(n.t+1)){

			unit[length(unit)+1]<-i
			period[length(period)+1]<-(j-1)
			x[length(x)+1]<-runif(1, min=-3, max=3)
      z[length(z)+1]<-runif(1, min=-3, max=3)

			if(j!=1){l.y[length(l.y)+1] <- y[length(y)]; y[length(y) + 1 ] <- unit.effect[i] + b.lag[r]*y[length(y)] + (b.x[r]+b.int[r]*y[length(y)])*x[length(x)]+b.z[r]*z[length(z)]+rnorm(1, mean=0, sd=3)}
			else{l.y[length(l.y)+1]<-NA; y[length(y)+1]<-unit.effect[i] + runif(1, min=-2, max=2)}

		}

	}

	dat<-cbind(unit, period, x, z, y, l.y)
	dat<-subset(dat, period>0)
	dat<-as.data.frame(dat)

  est[r,]<-c(0,coefficients(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))[c(1,2,3,14)])
	bias[r,]<-c(0,coefficients(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))[c(1,2,3)])




  if(r==1){rmse<-c()}
  y.correct<-b.intercept[r]+b.lag[r]*dat$l.y+b.x[r]*dat$x+b.z[r]*dat$z+b.int[r]*dat$l.y*dat$x+unit.effect[dat$unit]
  y.pred<-predict(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))
  rmse[r]<-sqrt(mean((y.pred-y.correct)^2))
  if(r==1){rmse.misspec<-c()}
  y.misspec<-predict(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))
  rmse.misspec[r]<-sqrt(mean((y.misspec-y.correct)^2))
  

}

close(pb)

rmse.lo[3]<-quantile(rmse, probs=0.025)
rmse.mid[3]<-quantile(rmse, probs=0.5)
rmse.hi[3]<-quantile(rmse, probs=0.975)

rmse.misspec.lo[3]<-quantile(rmse.misspec, probs=0.025)
rmse.misspec.mid[3]<-quantile(rmse.misspec, probs=0.5)
rmse.misspec.hi[3]<-quantile(rmse.misspec, probs=0.975)



accuracy.x.lo[3]<-quantile(est[,3]-true[,3], probs=0.025)
accuracy.x[3]<-quantile(est[,3]-true[,3], probs=0.5)
accuracy.x.hi[3]<-quantile(est[,3]-true[,3], probs=0.975)

accuracy.prod.lo[3]<-quantile(est[,5]-true[,5], probs=0.025)
accuracy.prod[3]<-quantile(est[,5]-true[,5], probs=0.5)
accuracy.prod.hi[3]<-quantile(est[,5]-true[,5], probs=0.975)

accuracy.z.lo[3]<-quantile(est[,4]-true[,4], probs=0.025)
accuracy.z[3]<-quantile(est[,4]-true[,4], probs=0.5)
accuracy.z.hi[3]<-quantile(est[,4]-true[,4], probs=0.975)

accuracy.lag.lo[3]<-quantile(est[,2]-true[,2], probs=0.025)
accuracy.lag[3]<-quantile(est[,2]-true[,2], probs=0.5)
accuracy.lag.hi[3]<-quantile(est[,2]-true[,2], probs=0.975)







##############################################
# Model WITH unit effects
# N = 10, T = 30
#
##############################################


n.t<-30                     # How many observations per panel (time length)?

# draw the beta coefficients for each model
b.intercept<-runif(reps, min=-3, max=3)
b.x<-runif(reps, min=-2, max=2)
b.int<-runif(reps, min=-0.4, max=0.4)
b.lag<-runif(reps, min=-0.4, max=0.4)
b.z<-runif(reps, min=-2, max=2)

print(paste("Simulating n= ", n.unit, ", t= ", n.t, sep=""), quote=F)  

#store the true and estimated betas, where estimates come from a biased and a correct model
true<-cbind(b.intercept,b.lag,b.x,b.z,b.int)
est<-matrix(data=NA, nrow=reps, ncol=5)
bias<-matrix(data=NA, nrow=reps, ncol=4)

pb <- txtProgressBar(min = 0, max = reps, style = 3)

# run the monte carlo simulations
for(r in 1:reps){

	
	setTxtProgressBar(pb, r)
	
	unit<-c()
	period<-c()
	x<-c()
	z<-c()
	y<-c()
	l.y<-c()
	unit.effect<-runif(n.unit, min=-3, max=3)

	for(i in 1:n.unit){

		for(j in 1:(n.t+1)){

			unit[length(unit)+1]<-i
			period[length(period)+1]<-(j-1)
			x[length(x)+1]<-runif(1, min=-3, max=3)
      z[length(z)+1]<-runif(1, min=-3, max=3)

			if(j!=1){l.y[length(l.y)+1] <- y[length(y)]; y[length(y) + 1 ] <- unit.effect[i] + b.lag[r]*y[length(y)] + (b.x[r]+b.int[r]*y[length(y)])*x[length(x)]+b.z[r]*z[length(z)]+rnorm(1, mean=0, sd=3)}
			else{l.y[length(l.y)+1]<-NA; y[length(y)+1]<-unit.effect[i] + runif(1, min=-2, max=2)}

		}

	}

	dat<-cbind(unit, period, x, z, y, l.y)
	dat<-subset(dat, period>0)
	dat<-as.data.frame(dat)

  est[r,]<-c(0,coefficients(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))[c(1,2,3,14)])
	bias[r,]<-c(0,coefficients(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))[c(1,2,3)])




  if(r==1){rmse<-c()}
  y.correct<-b.intercept[r]+b.lag[r]*dat$l.y+b.x[r]*dat$x+b.z[r]*dat$z+b.int[r]*dat$l.y*dat$x+unit.effect[dat$unit]
  y.pred<-predict(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))
  rmse[r]<-sqrt(mean((y.pred-y.correct)^2))
  if(r==1){rmse.misspec<-c()}
  y.misspec<-predict(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))
  rmse.misspec[r]<-sqrt(mean((y.misspec-y.correct)^2))
  

}

close(pb)

rmse.lo[4]<-quantile(rmse, probs=0.025)
rmse.mid[4]<-quantile(rmse, probs=0.5)
rmse.hi[4]<-quantile(rmse, probs=0.975)

rmse.misspec.lo[4]<-quantile(rmse.misspec, probs=0.025)
rmse.misspec.mid[4]<-quantile(rmse.misspec, probs=0.5)
rmse.misspec.hi[4]<-quantile(rmse.misspec, probs=0.975)


accuracy.x.lo[4]<-quantile(est[,3]-true[,3], probs=0.025)
accuracy.x[4]<-quantile(est[,3]-true[,3], probs=0.5)
accuracy.x.hi[4]<-quantile(est[,3]-true[,3], probs=0.975)

accuracy.prod.lo[4]<-quantile(est[,5]-true[,5], probs=0.025)
accuracy.prod[4]<-quantile(est[,5]-true[,5], probs=0.5)
accuracy.prod.hi[4]<-quantile(est[,5]-true[,5], probs=0.975)

accuracy.z.lo[4]<-quantile(est[,4]-true[,4], probs=0.025)
accuracy.z[4]<-quantile(est[,4]-true[,4], probs=0.5)
accuracy.z.hi[4]<-quantile(est[,4]-true[,4], probs=0.975)

accuracy.lag.lo[4]<-quantile(est[,2]-true[,2], probs=0.025)
accuracy.lag[4]<-quantile(est[,2]-true[,2], probs=0.5)
accuracy.lag.hi[4]<-quantile(est[,2]-true[,2], probs=0.975)







##############################################
# Model WITH unit effects
# N = 10, T = 40
#
##############################################


n.t<-40                     # How many observations per panel (time length)?

# draw the beta coefficients for each model
b.intercept<-runif(reps, min=-3, max=3)
b.x<-runif(reps, min=-2, max=2)
b.int<-runif(reps, min=-0.4, max=0.4)
b.lag<-runif(reps, min=-0.4, max=0.4)
b.z<-runif(reps, min=-2, max=2)

print(paste("Simulating n= ", n.unit, ", t= ", n.t, sep=""), quote=F)  

#store the true and estimated betas, where estimates come from a biased and a correct model
true<-cbind(b.intercept,b.lag,b.x,b.z,b.int)
est<-matrix(data=NA, nrow=reps, ncol=5)
bias<-matrix(data=NA, nrow=reps, ncol=4)

pb <- txtProgressBar(min = 0, max = reps, style = 3)

# run the monte carlo simulations
for(r in 1:reps){

	
	setTxtProgressBar(pb, r)
	
	unit<-c()
	period<-c()
	x<-c()
	z<-c()
	y<-c()
	l.y<-c()
	unit.effect<-runif(n.unit, min=-3, max=3)

	for(i in 1:n.unit){

		for(j in 1:(n.t+1)){

			unit[length(unit)+1]<-i
			period[length(period)+1]<-(j-1)
			x[length(x)+1]<-runif(1, min=-3, max=3)
      z[length(z)+1]<-runif(1, min=-3, max=3)

			if(j!=1){l.y[length(l.y)+1] <- y[length(y)]; y[length(y) + 1 ] <- unit.effect[i] + b.lag[r]*y[length(y)] + (b.x[r]+b.int[r]*y[length(y)])*x[length(x)]+b.z[r]*z[length(z)]+rnorm(1, mean=0, sd=3)}
			else{l.y[length(l.y)+1]<-NA; y[length(y)+1]<-unit.effect[i] + runif(1, min=-2, max=2)}

		}

	}

	dat<-cbind(unit, period, x, z, y, l.y)
	dat<-subset(dat, period>0)
	dat<-as.data.frame(dat)

  est[r,]<-c(0,coefficients(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))[c(1,2,3,14)])
	bias[r,]<-c(0,coefficients(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))[c(1,2,3)])




  if(r==1){rmse<-c()}
  y.correct<-b.intercept[r]+b.lag[r]*dat$l.y+b.x[r]*dat$x+b.z[r]*dat$z+b.int[r]*dat$l.y*dat$x+unit.effect[dat$unit]
  y.pred<-predict(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))
  rmse[r]<-sqrt(mean((y.pred-y.correct)^2))
  if(r==1){rmse.misspec<-c()}
  y.misspec<-predict(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))
  rmse.misspec[r]<-sqrt(mean((y.misspec-y.correct)^2))
  

}

close(pb)

rmse.lo[5]<-quantile(rmse, probs=0.025)
rmse.mid[5]<-quantile(rmse, probs=0.5)
rmse.hi[5]<-quantile(rmse, probs=0.975)

rmse.misspec.lo[5]<-quantile(rmse.misspec, probs=0.025)
rmse.misspec.mid[5]<-quantile(rmse.misspec, probs=0.5)
rmse.misspec.hi[5]<-quantile(rmse.misspec, probs=0.975)


accuracy.x.lo[5]<-quantile(est[,3]-true[,3], probs=0.025)
accuracy.x[5]<-quantile(est[,3]-true[,3], probs=0.5)
accuracy.x.hi[5]<-quantile(est[,3]-true[,3], probs=0.975)

accuracy.prod.lo[5]<-quantile(est[,5]-true[,5], probs=0.025)
accuracy.prod[5]<-quantile(est[,5]-true[,5], probs=0.5)
accuracy.prod.hi[5]<-quantile(est[,5]-true[,5], probs=0.975)

accuracy.z.lo[5]<-quantile(est[,4]-true[,4], probs=0.025)
accuracy.z[5]<-quantile(est[,4]-true[,4], probs=0.5)
accuracy.z.hi[5]<-quantile(est[,4]-true[,4], probs=0.975)

accuracy.lag.lo[5]<-quantile(est[,2]-true[,2], probs=0.025)
accuracy.lag[5]<-quantile(est[,2]-true[,2], probs=0.5)
accuracy.lag.hi[5]<-quantile(est[,2]-true[,2], probs=0.975)





##############################################
# Model WITH unit effects
# N = 10, T = 50
#
##############################################


n.t<-50                     # How many observations per panel (time length)?

# draw the beta coefficients for each model
b.intercept<-runif(reps, min=-3, max=3)
b.x<-runif(reps, min=-2, max=2)
b.int<-runif(reps, min=-0.4, max=0.4)
b.lag<-runif(reps, min=-0.4, max=0.4)
b.z<-runif(reps, min=-2, max=2)

print(paste("Simulating n= ", n.unit, ", t= ", n.t, sep=""), quote=F)  

#store the true and estimated betas, where estimates come from a biased and a correct model
true<-cbind(b.intercept,b.lag,b.x,b.z,b.int)
est<-matrix(data=NA, nrow=reps, ncol=5)
bias<-matrix(data=NA, nrow=reps, ncol=4)

pb <- txtProgressBar(min = 0, max = reps, style = 3)

# run the monte carlo simulations
for(r in 1:reps){

	
	setTxtProgressBar(pb, r)
	
	unit<-c()
	period<-c()
	x<-c()
	z<-c()
	y<-c()
	l.y<-c()
	unit.effect<-runif(n.unit, min=-3, max=3)

	for(i in 1:n.unit){

		for(j in 1:(n.t+1)){

			unit[length(unit)+1]<-i
			period[length(period)+1]<-(j-1)
			x[length(x)+1]<-runif(1, min=-3, max=3)
      z[length(z)+1]<-runif(1, min=-3, max=3)

			if(j!=1){l.y[length(l.y)+1] <- y[length(y)]; y[length(y) + 1 ] <- unit.effect[i] + b.lag[r]*y[length(y)] + (b.x[r]+b.int[r]*y[length(y)])*x[length(x)]+b.z[r]*z[length(z)]+rnorm(1, mean=0, sd=3)}
			else{l.y[length(l.y)+1]<-NA; y[length(y)+1]<-unit.effect[i] + runif(1, min=-2, max=2)}

		}

	}

	dat<-cbind(unit, period, x, z, y, l.y)
	dat<-subset(dat, period>0)
	dat<-as.data.frame(dat)

  est[r,]<-c(0,coefficients(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))[c(1,2,3,14)])
	bias[r,]<-c(0,coefficients(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))[c(1,2,3)])




  if(r==1){rmse<-c()}
  y.correct<-b.intercept[r]+b.lag[r]*dat$l.y+b.x[r]*dat$x+b.z[r]*dat$z+b.int[r]*dat$l.y*dat$x+unit.effect[dat$unit]
  y.pred<-predict(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))
  rmse[r]<-sqrt(mean((y.pred-y.correct)^2))
  if(r==1){rmse.misspec<-c()}
  y.misspec<-predict(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))
  rmse.misspec[r]<-sqrt(mean((y.misspec-y.correct)^2))
  

}

close(pb)

rmse.lo[6]<-quantile(rmse, probs=0.025)
rmse.mid[6]<-quantile(rmse, probs=0.5)
rmse.hi[6]<-quantile(rmse, probs=0.975)

rmse.misspec.lo[6]<-quantile(rmse.misspec, probs=0.025)
rmse.misspec.mid[6]<-quantile(rmse.misspec, probs=0.5)
rmse.misspec.hi[6]<-quantile(rmse.misspec, probs=0.975)

accuracy.x.lo[6]<-quantile(est[,3]-true[,3], probs=0.025)
accuracy.x[6]<-quantile(est[,3]-true[,3], probs=0.5)
accuracy.x.hi[6]<-quantile(est[,3]-true[,3], probs=0.975)

accuracy.prod.lo[6]<-quantile(est[,5]-true[,5], probs=0.025)
accuracy.prod[6]<-quantile(est[,5]-true[,5], probs=0.5)
accuracy.prod.hi[6]<-quantile(est[,5]-true[,5], probs=0.975)

accuracy.z.lo[6]<-quantile(est[,4]-true[,4], probs=0.025)
accuracy.z[6]<-quantile(est[,4]-true[,4], probs=0.5)
accuracy.z.hi[6]<-quantile(est[,4]-true[,4], probs=0.975)

accuracy.lag.lo[6]<-quantile(est[,2]-true[,2], probs=0.025)
accuracy.lag[6]<-quantile(est[,2]-true[,2], probs=0.5)
accuracy.lag.hi[6]<-quantile(est[,2]-true[,2], probs=0.975)





#################################
# Graphical analysis
#################################



# Accuracy of Coefficient Estimates 

par(mfrow=c(2,2), las=2)
n.t<-c(5,10,20,30,40,50)
plot(accuracy.lag~n.t, type="l", ylim=c(min(accuracy.lag.lo), max(accuracy.lag.hi)), main="lag coefficient", xlab="T", ylab="estimate - true value")
lines(accuracy.lag.lo~n.t, lty=2)
lines(accuracy.lag.hi~n.t, lty=2)

plot(accuracy.z~n.t, type="l", ylim=c(min(accuracy.z.lo), max(accuracy.z.hi)), main="z coefficient", xlab="T", ylab="estimate - true value")
lines(accuracy.z.lo~n.t, lty=2)
lines(accuracy.z.hi~n.t, lty=2)

plot(accuracy.x~n.t, type="l", ylim=c(min(accuracy.x.lo), max(accuracy.x.hi)), main="x coefficient", xlab="T", ylab="estimate - true value")
lines(accuracy.x.lo~n.t, lty=2)
lines(accuracy.x.hi~n.t, lty=2)


plot(accuracy.prod~n.t, type="l", ylim=c(min(accuracy.prod.lo), max(accuracy.prod.hi)), main="product term coefficient", xlab="T", ylab="estimate - true value")
lines(accuracy.prod.lo~n.t, lty=2)
lines(accuracy.prod.hi~n.t, lty=2)

dev.copy2eps(file="coef-acc-5-31-2011.eps")




# Accuracy (RMSE) of predictions

par(mfrow=c(1,1), las=1,mar=c(5,5,4,2))
plot(rmse.mid~n.t, type="l", ylim=c(min(c(rmse.mid,rmse.misspec.mid)), max(c(rmse.mid,rmse.misspec.mid))), main=c("Correct vs. Misspecified Model Performance","RMSE for Predicting Dependent Variable"), xlab="T", ylab="Median RMSE from Simulations")
lines(rmse.misspec.mid~n.t, lwd=3)
legend("right", lwd=c(1,3), legend=c("Correct Model", "Misspecified Model"))

dev.copy2eps(file="rmse-CorAndMis-5-31-2011.eps")

n.10.mat<-cbind(rmse.mid,n.t)
write.csv(n.10.mat, file="rmse_n10.csv")

accuracy.lag.10.mat<-cbind(accuracy.lag,n.t)
write.csv(accuracy.lag.10.mat, file="lag_n10.csv")












####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################










# model with unit effects, N=20


rm(list=ls())
require(gplots)

accuracy.x<-c()
accuracy.x.hi<-c()
accuracy.x.lo<-c()

accuracy.z<-c()
accuracy.z.hi<-c()
accuracy.z.lo<-c()

accuracy.lag<-c()
accuracy.lag.hi<-c()
accuracy.lag.lo<-c()

accuracy.prod<-c()
accuracy.prod.hi<-c()
accuracy.prod.lo<-c()

rmse.lo<-c()
rmse.mid<-c()
rmse.hi<-c()

rmse.misspec.lo<-c()
rmse.misspec.mid<-c()
rmse.misspec.hi<-c()



reps<-1000
n.unit<-20                 # How many units (panels)?



##############################################
# Model WITH unit effects
# N = 20, T = 5
#
##############################################


n.t<-5                     # How many observations per panel (time length)?

# draw the beta coefficients for each model
b.intercept<-runif(reps, min=-3, max=3)
b.x<-runif(reps, min=-2, max=2)
b.int<-runif(reps, min=-0.4, max=0.4)
b.lag<-runif(reps, min=-0.4, max=0.4)
b.z<-runif(reps, min=-2, max=2)

print(paste("Simulating n= ", n.unit, ", t= ", n.t, sep=""), quote=F)  

#store the true and estimated betas, where estimates come from a biased and a correct model
true<-cbind(b.intercept,b.lag,b.x,b.z,b.int)
est<-matrix(data=NA, nrow=reps, ncol=5)
bias<-matrix(data=NA, nrow=reps, ncol=4)

pb <- txtProgressBar(min = 0, max = reps, style = 3)

# run the monte carlo simulations
for(r in 1:reps){

	
	setTxtProgressBar(pb, r)
	
	unit<-c()
	period<-c()
	x<-c()
	z<-c()
	y<-c()
	l.y<-c()
	unit.effect<-runif(n.unit, min=-3, max=3)

	for(i in 1:n.unit){

		for(j in 1:(n.t+1)){

			unit[length(unit)+1]<-i
			period[length(period)+1]<-(j-1)
			x[length(x)+1]<-runif(1, min=-3, max=3)
      z[length(z)+1]<-runif(1, min=-3, max=3)

			if(j!=1){l.y[length(l.y)+1] <- y[length(y)]; y[length(y) + 1 ] <- unit.effect[i] + b.lag[r]*y[length(y)] + (b.x[r]+b.int[r]*y[length(y)])*x[length(x)]+b.z[r]*z[length(z)]+rnorm(1, mean=0, sd=3)}
			else{l.y[length(l.y)+1]<-NA; y[length(y)+1]<-unit.effect[i] + runif(1, min=-2, max=2)}

		}

	}

	dat<-cbind(unit, period, x, z, y, l.y)
	dat<-subset(dat, period>0)
	dat<-as.data.frame(dat)

  est[r,]<-c(0,coefficients(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))[c(1,2,3,14)])
	bias[r,]<-c(0,coefficients(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))[c(1,2,3)])



  if(r==1){rmse<-c()}
  y.correct<-b.intercept[r]+b.lag[r]*dat$l.y+b.x[r]*dat$x+b.z[r]*dat$z+b.int[r]*dat$l.y*dat$x+unit.effect[dat$unit]
  y.pred<-predict(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))
  rmse[r]<-sqrt(mean((y.pred-y.correct)^2))
  if(r==1){rmse.misspec<-c()}
  y.misspec<-predict(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))
  rmse.misspec[r]<-sqrt(mean((y.misspec-y.correct)^2))
  

}

close(pb)

rmse.lo[1]<-quantile(rmse, probs=0.025)
rmse.mid[1]<-quantile(rmse, probs=0.5)
rmse.hi[1]<-quantile(rmse, probs=0.975)

rmse.misspec.lo[1]<-quantile(rmse.misspec, probs=0.025)
rmse.misspec.mid[1]<-quantile(rmse.misspec, probs=0.5)
rmse.misspec.hi[1]<-quantile(rmse.misspec, probs=0.975)


accuracy.x.lo[1]<-quantile(est[,3]-true[,3], probs=0.025)
accuracy.x[1]<-quantile(est[,3]-true[,3], probs=0.5)
accuracy.x.hi[1]<-quantile(est[,3]-true[,3], probs=0.975)

accuracy.prod.lo[1]<-quantile(est[,5]-true[,5], probs=0.025)
accuracy.prod[1]<-quantile(est[,5]-true[,5], probs=0.5)
accuracy.prod.hi[1]<-quantile(est[,5]-true[,5], probs=0.975)

accuracy.z.lo[1]<-quantile(est[,4]-true[,4], probs=0.025)
accuracy.z[1]<-quantile(est[,4]-true[,4], probs=0.5)
accuracy.z.hi[1]<-quantile(est[,4]-true[,4], probs=0.975)

accuracy.lag.lo[1]<-quantile(est[,2]-true[,2], probs=0.025)
accuracy.lag[1]<-quantile(est[,2]-true[,2], probs=0.5)
accuracy.lag.hi[1]<-quantile(est[,2]-true[,2], probs=0.975)










##############################################
# Model WITH unit effects
# N = 20, T = 10
#
##############################################


n.t<-10                    # How many observations per panel (time length)?

# draw the beta coefficients for each model
b.intercept<-runif(reps, min=-3, max=3)
b.x<-runif(reps, min=-2, max=2)
b.int<-runif(reps, min=-0.4, max=0.4)
b.lag<-runif(reps, min=-0.4, max=0.4)
b.z<-runif(reps, min=-2, max=2)

print(paste("Simulating n= ", n.unit, ", t= ", n.t, sep=""), quote=F)  

#store the true and estimated betas, where estimates come from a biased and a correct model
true<-cbind(b.intercept,b.lag,b.x,b.z,b.int)
est<-matrix(data=NA, nrow=reps, ncol=5)
bias<-matrix(data=NA, nrow=reps, ncol=4)

pb <- txtProgressBar(min = 0, max = reps, style = 3)

# run the monte carlo simulations
for(r in 1:reps){

	
	setTxtProgressBar(pb, r)
	
	unit<-c()
	period<-c()
	x<-c()
	z<-c()
	y<-c()
	l.y<-c()
	unit.effect<-runif(n.unit, min=-3, max=3)

	for(i in 1:n.unit){

		for(j in 1:(n.t+1)){

			unit[length(unit)+1]<-i
			period[length(period)+1]<-(j-1)
			x[length(x)+1]<-runif(1, min=-3, max=3)
      z[length(z)+1]<-runif(1, min=-3, max=3)

			if(j!=1){l.y[length(l.y)+1] <- y[length(y)]; y[length(y) + 1 ] <- unit.effect[i] + b.lag[r]*y[length(y)] + (b.x[r]+b.int[r]*y[length(y)])*x[length(x)]+b.z[r]*z[length(z)]+rnorm(1, mean=0, sd=3)}
			else{l.y[length(l.y)+1]<-NA; y[length(y)+1]<-unit.effect[i] + runif(1, min=-2, max=2)}

		}

	}

	dat<-cbind(unit, period, x, z, y, l.y)
	dat<-subset(dat, period>0)
	dat<-as.data.frame(dat)

  est[r,]<-c(0,coefficients(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))[c(1,2,3,14)])
	bias[r,]<-c(0,coefficients(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))[c(1,2,3)])




  if(r==1){rmse<-c()}
  y.correct<-b.intercept[r]+b.lag[r]*dat$l.y+b.x[r]*dat$x+b.z[r]*dat$z+b.int[r]*dat$l.y*dat$x+unit.effect[dat$unit]
  y.pred<-predict(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))
  rmse[r]<-sqrt(mean((y.pred-y.correct)^2))
  if(r==1){rmse.misspec<-c()}
  y.misspec<-predict(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))
  rmse.misspec[r]<-sqrt(mean((y.misspec-y.correct)^2))
  

}

close(pb)

rmse.lo[2]<-quantile(rmse, probs=0.025)
rmse.mid[2]<-quantile(rmse, probs=0.5)
rmse.hi[2]<-quantile(rmse, probs=0.975)

rmse.misspec.lo[2]<-quantile(rmse.misspec, probs=0.025)
rmse.misspec.mid[2]<-quantile(rmse.misspec, probs=0.5)
rmse.misspec.hi[2]<-quantile(rmse.misspec, probs=0.975)


accuracy.x.lo[2]<-quantile(est[,3]-true[,3], probs=0.025)
accuracy.x[2]<-quantile(est[,3]-true[,3], probs=0.5)
accuracy.x.hi[2]<-quantile(est[,3]-true[,3], probs=0.975)

accuracy.prod.lo[2]<-quantile(est[,5]-true[,5], probs=0.025)
accuracy.prod[2]<-quantile(est[,5]-true[,5], probs=0.5)
accuracy.prod.hi[2]<-quantile(est[,5]-true[,5], probs=0.975)

accuracy.z.lo[2]<-quantile(est[,4]-true[,4], probs=0.025)
accuracy.z[2]<-quantile(est[,4]-true[,4], probs=0.5)
accuracy.z.hi[2]<-quantile(est[,4]-true[,4], probs=0.975)

accuracy.lag.lo[2]<-quantile(est[,2]-true[,2], probs=0.025)
accuracy.lag[2]<-quantile(est[,2]-true[,2], probs=0.5)
accuracy.lag.hi[2]<-quantile(est[,2]-true[,2], probs=0.975)









##############################################
# Model WITH unit effects
# N = 20, T = 20
#
##############################################


n.t<-20                     # How many observations per panel (time length)?

# draw the beta coefficients for each model
b.intercept<-runif(reps, min=-3, max=3)
b.x<-runif(reps, min=-2, max=2)
b.int<-runif(reps, min=-0.4, max=0.4)
b.lag<-runif(reps, min=-0.4, max=0.4)
b.z<-runif(reps, min=-2, max=2)

print(paste("Simulating n= ", n.unit, ", t= ", n.t, sep=""), quote=F)  

#store the true and estimated betas, where estimates come from a biased and a correct model
true<-cbind(b.intercept,b.lag,b.x,b.z,b.int)
est<-matrix(data=NA, nrow=reps, ncol=5)
bias<-matrix(data=NA, nrow=reps, ncol=4)

pb <- txtProgressBar(min = 0, max = reps, style = 3)

# run the monte carlo simulations
for(r in 1:reps){

	
	setTxtProgressBar(pb, r)
	
	unit<-c()
	period<-c()
	x<-c()
	z<-c()
	y<-c()
	l.y<-c()
	unit.effect<-runif(n.unit, min=-3, max=3)

	for(i in 1:n.unit){

		for(j in 1:(n.t+1)){

			unit[length(unit)+1]<-i
			period[length(period)+1]<-(j-1)
			x[length(x)+1]<-runif(1, min=-3, max=3)
      z[length(z)+1]<-runif(1, min=-3, max=3)

			if(j!=1){l.y[length(l.y)+1] <- y[length(y)]; y[length(y) + 1 ] <- unit.effect[i] + b.lag[r]*y[length(y)] + (b.x[r]+b.int[r]*y[length(y)])*x[length(x)]+b.z[r]*z[length(z)]+rnorm(1, mean=0, sd=3)}
			else{l.y[length(l.y)+1]<-NA; y[length(y)+1]<-unit.effect[i] + runif(1, min=-2, max=2)}

		}

	}

	dat<-cbind(unit, period, x, z, y, l.y)
	dat<-subset(dat, period>0)
	dat<-as.data.frame(dat)

  est[r,]<-c(0,coefficients(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))[c(1,2,3,14)])
	bias[r,]<-c(0,coefficients(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))[c(1,2,3)])




  if(r==1){rmse<-c()}
  y.correct<-b.intercept[r]+b.lag[r]*dat$l.y+b.x[r]*dat$x+b.z[r]*dat$z+b.int[r]*dat$l.y*dat$x+unit.effect[dat$unit]
  y.pred<-predict(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))
  rmse[r]<-sqrt(mean((y.pred-y.correct)^2))
  if(r==1){rmse.misspec<-c()}
  y.misspec<-predict(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))
  rmse.misspec[r]<-sqrt(mean((y.misspec-y.correct)^2))
  

}

close(pb)

rmse.lo[3]<-quantile(rmse, probs=0.025)
rmse.mid[3]<-quantile(rmse, probs=0.5)
rmse.hi[3]<-quantile(rmse, probs=0.975)

rmse.misspec.lo[3]<-quantile(rmse.misspec, probs=0.025)
rmse.misspec.mid[3]<-quantile(rmse.misspec, probs=0.5)
rmse.misspec.hi[3]<-quantile(rmse.misspec, probs=0.975)



accuracy.x.lo[3]<-quantile(est[,3]-true[,3], probs=0.025)
accuracy.x[3]<-quantile(est[,3]-true[,3], probs=0.5)
accuracy.x.hi[3]<-quantile(est[,3]-true[,3], probs=0.975)

accuracy.prod.lo[3]<-quantile(est[,5]-true[,5], probs=0.025)
accuracy.prod[3]<-quantile(est[,5]-true[,5], probs=0.5)
accuracy.prod.hi[3]<-quantile(est[,5]-true[,5], probs=0.975)

accuracy.z.lo[3]<-quantile(est[,4]-true[,4], probs=0.025)
accuracy.z[3]<-quantile(est[,4]-true[,4], probs=0.5)
accuracy.z.hi[3]<-quantile(est[,4]-true[,4], probs=0.975)

accuracy.lag.lo[3]<-quantile(est[,2]-true[,2], probs=0.025)
accuracy.lag[3]<-quantile(est[,2]-true[,2], probs=0.5)
accuracy.lag.hi[3]<-quantile(est[,2]-true[,2], probs=0.975)







##############################################
# Model WITH unit effects
# N = 20, T = 30
#
##############################################


n.t<-30                     # How many observations per panel (time length)?

# draw the beta coefficients for each model
b.intercept<-runif(reps, min=-3, max=3)
b.x<-runif(reps, min=-2, max=2)
b.int<-runif(reps, min=-0.4, max=0.4)
b.lag<-runif(reps, min=-0.4, max=0.4)
b.z<-runif(reps, min=-2, max=2)

print(paste("Simulating n= ", n.unit, ", t= ", n.t, sep=""), quote=F)  

#store the true and estimated betas, where estimates come from a biased and a correct model
true<-cbind(b.intercept,b.lag,b.x,b.z,b.int)
est<-matrix(data=NA, nrow=reps, ncol=5)
bias<-matrix(data=NA, nrow=reps, ncol=4)

pb <- txtProgressBar(min = 0, max = reps, style = 3)

# run the monte carlo simulations
for(r in 1:reps){

	
	setTxtProgressBar(pb, r)
	
	unit<-c()
	period<-c()
	x<-c()
	z<-c()
	y<-c()
	l.y<-c()
	unit.effect<-runif(n.unit, min=-3, max=3)

	for(i in 1:n.unit){

		for(j in 1:(n.t+1)){

			unit[length(unit)+1]<-i
			period[length(period)+1]<-(j-1)
			x[length(x)+1]<-runif(1, min=-3, max=3)
      z[length(z)+1]<-runif(1, min=-3, max=3)

			if(j!=1){l.y[length(l.y)+1] <- y[length(y)]; y[length(y) + 1 ] <- unit.effect[i] + b.lag[r]*y[length(y)] + (b.x[r]+b.int[r]*y[length(y)])*x[length(x)]+b.z[r]*z[length(z)]+rnorm(1, mean=0, sd=3)}
			else{l.y[length(l.y)+1]<-NA; y[length(y)+1]<-unit.effect[i] + runif(1, min=-2, max=2)}

		}

	}

	dat<-cbind(unit, period, x, z, y, l.y)
	dat<-subset(dat, period>0)
	dat<-as.data.frame(dat)

  est[r,]<-c(0,coefficients(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))[c(1,2,3,14)])
	bias[r,]<-c(0,coefficients(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))[c(1,2,3)])




  if(r==1){rmse<-c()}
  y.correct<-b.intercept[r]+b.lag[r]*dat$l.y+b.x[r]*dat$x+b.z[r]*dat$z+b.int[r]*dat$l.y*dat$x+unit.effect[dat$unit]
  y.pred<-predict(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))
  rmse[r]<-sqrt(mean((y.pred-y.correct)^2))
  if(r==1){rmse.misspec<-c()}
  y.misspec<-predict(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))
  rmse.misspec[r]<-sqrt(mean((y.misspec-y.correct)^2))
  

}

close(pb)

rmse.lo[4]<-quantile(rmse, probs=0.025)
rmse.mid[4]<-quantile(rmse, probs=0.5)
rmse.hi[4]<-quantile(rmse, probs=0.975)

rmse.misspec.lo[4]<-quantile(rmse.misspec, probs=0.025)
rmse.misspec.mid[4]<-quantile(rmse.misspec, probs=0.5)
rmse.misspec.hi[4]<-quantile(rmse.misspec, probs=0.975)


accuracy.x.lo[4]<-quantile(est[,3]-true[,3], probs=0.025)
accuracy.x[4]<-quantile(est[,3]-true[,3], probs=0.5)
accuracy.x.hi[4]<-quantile(est[,3]-true[,3], probs=0.975)

accuracy.prod.lo[4]<-quantile(est[,5]-true[,5], probs=0.025)
accuracy.prod[4]<-quantile(est[,5]-true[,5], probs=0.5)
accuracy.prod.hi[4]<-quantile(est[,5]-true[,5], probs=0.975)

accuracy.z.lo[4]<-quantile(est[,4]-true[,4], probs=0.025)
accuracy.z[4]<-quantile(est[,4]-true[,4], probs=0.5)
accuracy.z.hi[4]<-quantile(est[,4]-true[,4], probs=0.975)

accuracy.lag.lo[4]<-quantile(est[,2]-true[,2], probs=0.025)
accuracy.lag[4]<-quantile(est[,2]-true[,2], probs=0.5)
accuracy.lag.hi[4]<-quantile(est[,2]-true[,2], probs=0.975)







##############################################
# Model WITH unit effects
# N = 20, T = 40
#
##############################################


n.t<-40                     # How many observations per panel (time length)?

# draw the beta coefficients for each model
b.intercept<-runif(reps, min=-3, max=3)
b.x<-runif(reps, min=-2, max=2)
b.int<-runif(reps, min=-0.4, max=0.4)
b.lag<-runif(reps, min=-0.4, max=0.4)
b.z<-runif(reps, min=-2, max=2)

print(paste("Simulating n= ", n.unit, ", t= ", n.t, sep=""), quote=F)  

#store the true and estimated betas, where estimates come from a biased and a correct model
true<-cbind(b.intercept,b.lag,b.x,b.z,b.int)
est<-matrix(data=NA, nrow=reps, ncol=5)
bias<-matrix(data=NA, nrow=reps, ncol=4)

pb <- txtProgressBar(min = 0, max = reps, style = 3)

# run the monte carlo simulations
for(r in 1:reps){

	
	setTxtProgressBar(pb, r)
	
	unit<-c()
	period<-c()
	x<-c()
	z<-c()
	y<-c()
	l.y<-c()
	unit.effect<-runif(n.unit, min=-3, max=3)

	for(i in 1:n.unit){

		for(j in 1:(n.t+1)){

			unit[length(unit)+1]<-i
			period[length(period)+1]<-(j-1)
			x[length(x)+1]<-runif(1, min=-3, max=3)
      z[length(z)+1]<-runif(1, min=-3, max=3)

			if(j!=1){l.y[length(l.y)+1] <- y[length(y)]; y[length(y) + 1 ] <- unit.effect[i] + b.lag[r]*y[length(y)] + (b.x[r]+b.int[r]*y[length(y)])*x[length(x)]+b.z[r]*z[length(z)]+rnorm(1, mean=0, sd=3)}
			else{l.y[length(l.y)+1]<-NA; y[length(y)+1]<-unit.effect[i] + runif(1, min=-2, max=2)}

		}

	}

	dat<-cbind(unit, period, x, z, y, l.y)
	dat<-subset(dat, period>0)
	dat<-as.data.frame(dat)

  est[r,]<-c(0,coefficients(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))[c(1,2,3,14)])
	bias[r,]<-c(0,coefficients(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))[c(1,2,3)])




  if(r==1){rmse<-c()}
  y.correct<-b.intercept[r]+b.lag[r]*dat$l.y+b.x[r]*dat$x+b.z[r]*dat$z+b.int[r]*dat$l.y*dat$x+unit.effect[dat$unit]
  y.pred<-predict(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))
  rmse[r]<-sqrt(mean((y.pred-y.correct)^2))
  if(r==1){rmse.misspec<-c()}
  y.misspec<-predict(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))
  rmse.misspec[r]<-sqrt(mean((y.misspec-y.correct)^2))
  

}

close(pb)

rmse.lo[5]<-quantile(rmse, probs=0.025)
rmse.mid[5]<-quantile(rmse, probs=0.5)
rmse.hi[5]<-quantile(rmse, probs=0.975)

rmse.misspec.lo[5]<-quantile(rmse.misspec, probs=0.025)
rmse.misspec.mid[5]<-quantile(rmse.misspec, probs=0.5)
rmse.misspec.hi[5]<-quantile(rmse.misspec, probs=0.975)


accuracy.x.lo[5]<-quantile(est[,3]-true[,3], probs=0.025)
accuracy.x[5]<-quantile(est[,3]-true[,3], probs=0.5)
accuracy.x.hi[5]<-quantile(est[,3]-true[,3], probs=0.975)

accuracy.prod.lo[5]<-quantile(est[,5]-true[,5], probs=0.025)
accuracy.prod[5]<-quantile(est[,5]-true[,5], probs=0.5)
accuracy.prod.hi[5]<-quantile(est[,5]-true[,5], probs=0.975)

accuracy.z.lo[5]<-quantile(est[,4]-true[,4], probs=0.025)
accuracy.z[5]<-quantile(est[,4]-true[,4], probs=0.5)
accuracy.z.hi[5]<-quantile(est[,4]-true[,4], probs=0.975)

accuracy.lag.lo[5]<-quantile(est[,2]-true[,2], probs=0.025)
accuracy.lag[5]<-quantile(est[,2]-true[,2], probs=0.5)
accuracy.lag.hi[5]<-quantile(est[,2]-true[,2], probs=0.975)





##############################################
# Model WITH unit effects
# N = 20, T = 50
#
##############################################


n.t<-50                     # How many observations per panel (time length)?

# draw the beta coefficients for each model
b.intercept<-runif(reps, min=-3, max=3)
b.x<-runif(reps, min=-2, max=2)
b.int<-runif(reps, min=-0.4, max=0.4)
b.lag<-runif(reps, min=-0.4, max=0.4)
b.z<-runif(reps, min=-2, max=2)

print(paste("Simulating n= ", n.unit, ", t= ", n.t, sep=""), quote=F)  

#store the true and estimated betas, where estimates come from a biased and a correct model
true<-cbind(b.intercept,b.lag,b.x,b.z,b.int)
est<-matrix(data=NA, nrow=reps, ncol=5)
bias<-matrix(data=NA, nrow=reps, ncol=4)

pb <- txtProgressBar(min = 0, max = reps, style = 3)

# run the monte carlo simulations
for(r in 1:reps){

	
	setTxtProgressBar(pb, r)
	
	unit<-c()
	period<-c()
	x<-c()
	z<-c()
	y<-c()
	l.y<-c()
	unit.effect<-runif(n.unit, min=-3, max=3)

	for(i in 1:n.unit){

		for(j in 1:(n.t+1)){

			unit[length(unit)+1]<-i
			period[length(period)+1]<-(j-1)
			x[length(x)+1]<-runif(1, min=-3, max=3)
      z[length(z)+1]<-runif(1, min=-3, max=3)

			if(j!=1){l.y[length(l.y)+1] <- y[length(y)]; y[length(y) + 1 ] <- unit.effect[i] + b.lag[r]*y[length(y)] + (b.x[r]+b.int[r]*y[length(y)])*x[length(x)]+b.z[r]*z[length(z)]+rnorm(1, mean=0, sd=3)}
			else{l.y[length(l.y)+1]<-NA; y[length(y)+1]<-unit.effect[i] + runif(1, min=-2, max=2)}

		}

	}

	dat<-cbind(unit, period, x, z, y, l.y)
	dat<-subset(dat, period>0)
	dat<-as.data.frame(dat)

  est[r,]<-c(0,coefficients(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))[c(1,2,3,14)])
	bias[r,]<-c(0,coefficients(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))[c(1,2,3)])




  if(r==1){rmse<-c()}
  y.correct<-b.intercept[r]+b.lag[r]*dat$l.y+b.x[r]*dat$x+b.z[r]*dat$z+b.int[r]*dat$l.y*dat$x+unit.effect[dat$unit]
  y.pred<-predict(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))
  rmse[r]<-sqrt(mean((y.pred-y.correct)^2))
  if(r==1){rmse.misspec<-c()}
  y.misspec<-predict(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))
  rmse.misspec[r]<-sqrt(mean((y.misspec-y.correct)^2))
  

}

close(pb)

rmse.lo[6]<-quantile(rmse, probs=0.025)
rmse.mid[6]<-quantile(rmse, probs=0.5)
rmse.hi[6]<-quantile(rmse, probs=0.975)

rmse.misspec.lo[6]<-quantile(rmse.misspec, probs=0.025)
rmse.misspec.mid[6]<-quantile(rmse.misspec, probs=0.5)
rmse.misspec.hi[6]<-quantile(rmse.misspec, probs=0.975)

accuracy.x.lo[6]<-quantile(est[,3]-true[,3], probs=0.025)
accuracy.x[6]<-quantile(est[,3]-true[,3], probs=0.5)
accuracy.x.hi[6]<-quantile(est[,3]-true[,3], probs=0.975)

accuracy.prod.lo[6]<-quantile(est[,5]-true[,5], probs=0.025)
accuracy.prod[6]<-quantile(est[,5]-true[,5], probs=0.5)
accuracy.prod.hi[6]<-quantile(est[,5]-true[,5], probs=0.975)

accuracy.z.lo[6]<-quantile(est[,4]-true[,4], probs=0.025)
accuracy.z[6]<-quantile(est[,4]-true[,4], probs=0.5)
accuracy.z.hi[6]<-quantile(est[,4]-true[,4], probs=0.975)

accuracy.lag.lo[6]<-quantile(est[,2]-true[,2], probs=0.025)
accuracy.lag[6]<-quantile(est[,2]-true[,2], probs=0.5)
accuracy.lag.hi[6]<-quantile(est[,2]-true[,2], probs=0.975)

n.t<-c(5,10,20,30,40,50)
n.20.mat<-cbind(rmse.mid,n.t)
write.csv(n.20.mat, file="rmse_n20.csv")

accuracy.lag.20.mat<-cbind(accuracy.lag,n.t)
write.csv(accuracy.lag.20.mat, file="lag_n20.csv")














####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################










# model with unit effects, N=50


rm(list=ls())
require(gplots)

accuracy.x<-c()
accuracy.x.hi<-c()
accuracy.x.lo<-c()

accuracy.z<-c()
accuracy.z.hi<-c()
accuracy.z.lo<-c()

accuracy.lag<-c()
accuracy.lag.hi<-c()
accuracy.lag.lo<-c()

accuracy.prod<-c()
accuracy.prod.hi<-c()
accuracy.prod.lo<-c()

rmse.lo<-c()
rmse.mid<-c()
rmse.hi<-c()

rmse.misspec.lo<-c()
rmse.misspec.mid<-c()
rmse.misspec.hi<-c()



reps<-1000
n.unit<-50                 # How many units (panels)?



##############################################
# Model WITH unit effects
# N = 50, T = 5
#
##############################################


n.t<-5                     # How many observations per panel (time length)?

# draw the beta coefficients for each model
b.intercept<-runif(reps, min=-3, max=3)
b.x<-runif(reps, min=-2, max=2)
b.int<-runif(reps, min=-0.4, max=0.4)
b.lag<-runif(reps, min=-0.4, max=0.4)
b.z<-runif(reps, min=-2, max=2)

print(paste("Simulating n= ", n.unit, ", t= ", n.t, sep=""), quote=F)  

#store the true and estimated betas, where estimates come from a biased and a correct model
true<-cbind(b.intercept,b.lag,b.x,b.z,b.int)
est<-matrix(data=NA, nrow=reps, ncol=5)
bias<-matrix(data=NA, nrow=reps, ncol=4)

pb <- txtProgressBar(min = 0, max = reps, style = 3)

# run the monte carlo simulations
for(r in 1:reps){

	
	setTxtProgressBar(pb, r)
	
	unit<-c()
	period<-c()
	x<-c()
	z<-c()
	y<-c()
	l.y<-c()
	unit.effect<-runif(n.unit, min=-3, max=3)

	for(i in 1:n.unit){

		for(j in 1:(n.t+1)){

			unit[length(unit)+1]<-i
			period[length(period)+1]<-(j-1)
			x[length(x)+1]<-runif(1, min=-3, max=3)
      z[length(z)+1]<-runif(1, min=-3, max=3)

			if(j!=1){l.y[length(l.y)+1] <- y[length(y)]; y[length(y) + 1 ] <- unit.effect[i] + b.lag[r]*y[length(y)] + (b.x[r]+b.int[r]*y[length(y)])*x[length(x)]+b.z[r]*z[length(z)]+rnorm(1, mean=0, sd=3)}
			else{l.y[length(l.y)+1]<-NA; y[length(y)+1]<-unit.effect[i] + runif(1, min=-2, max=2)}

		}

	}

	dat<-cbind(unit, period, x, z, y, l.y)
	dat<-subset(dat, period>0)
	dat<-as.data.frame(dat)

  est[r,]<-c(0,coefficients(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))[c(1,2,3,14)])
	bias[r,]<-c(0,coefficients(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))[c(1,2,3)])



  if(r==1){rmse<-c()}
  y.correct<-b.intercept[r]+b.lag[r]*dat$l.y+b.x[r]*dat$x+b.z[r]*dat$z+b.int[r]*dat$l.y*dat$x+unit.effect[dat$unit]
  y.pred<-predict(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))
  rmse[r]<-sqrt(mean((y.pred-y.correct)^2))
  if(r==1){rmse.misspec<-c()}
  y.misspec<-predict(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))
  rmse.misspec[r]<-sqrt(mean((y.misspec-y.correct)^2))
  

}

close(pb)

rmse.lo[1]<-quantile(rmse, probs=0.025)
rmse.mid[1]<-quantile(rmse, probs=0.5)
rmse.hi[1]<-quantile(rmse, probs=0.975)

rmse.misspec.lo[1]<-quantile(rmse.misspec, probs=0.025)
rmse.misspec.mid[1]<-quantile(rmse.misspec, probs=0.5)
rmse.misspec.hi[1]<-quantile(rmse.misspec, probs=0.975)


accuracy.x.lo[1]<-quantile(est[,3]-true[,3], probs=0.025)
accuracy.x[1]<-quantile(est[,3]-true[,3], probs=0.5)
accuracy.x.hi[1]<-quantile(est[,3]-true[,3], probs=0.975)

accuracy.prod.lo[1]<-quantile(est[,5]-true[,5], probs=0.025)
accuracy.prod[1]<-quantile(est[,5]-true[,5], probs=0.5)
accuracy.prod.hi[1]<-quantile(est[,5]-true[,5], probs=0.975)

accuracy.z.lo[1]<-quantile(est[,4]-true[,4], probs=0.025)
accuracy.z[1]<-quantile(est[,4]-true[,4], probs=0.5)
accuracy.z.hi[1]<-quantile(est[,4]-true[,4], probs=0.975)

accuracy.lag.lo[1]<-quantile(est[,2]-true[,2], probs=0.025)
accuracy.lag[1]<-quantile(est[,2]-true[,2], probs=0.5)
accuracy.lag.hi[1]<-quantile(est[,2]-true[,2], probs=0.975)










##############################################
# Model WITH unit effects
# N = 50, T = 10
#
##############################################


n.t<-10                    # How many observations per panel (time length)?

# draw the beta coefficients for each model
b.intercept<-runif(reps, min=-3, max=3)
b.x<-runif(reps, min=-2, max=2)
b.int<-runif(reps, min=-0.4, max=0.4)
b.lag<-runif(reps, min=-0.4, max=0.4)
b.z<-runif(reps, min=-2, max=2)

print(paste("Simulating n= ", n.unit, ", t= ", n.t, sep=""), quote=F)  

#store the true and estimated betas, where estimates come from a biased and a correct model
true<-cbind(b.intercept,b.lag,b.x,b.z,b.int)
est<-matrix(data=NA, nrow=reps, ncol=5)
bias<-matrix(data=NA, nrow=reps, ncol=4)

pb <- txtProgressBar(min = 0, max = reps, style = 3)

# run the monte carlo simulations
for(r in 1:reps){

	
	setTxtProgressBar(pb, r)
	
	unit<-c()
	period<-c()
	x<-c()
	z<-c()
	y<-c()
	l.y<-c()
	unit.effect<-runif(n.unit, min=-3, max=3)

	for(i in 1:n.unit){

		for(j in 1:(n.t+1)){

			unit[length(unit)+1]<-i
			period[length(period)+1]<-(j-1)
			x[length(x)+1]<-runif(1, min=-3, max=3)
      z[length(z)+1]<-runif(1, min=-3, max=3)

			if(j!=1){l.y[length(l.y)+1] <- y[length(y)]; y[length(y) + 1 ] <- unit.effect[i] + b.lag[r]*y[length(y)] + (b.x[r]+b.int[r]*y[length(y)])*x[length(x)]+b.z[r]*z[length(z)]+rnorm(1, mean=0, sd=3)}
			else{l.y[length(l.y)+1]<-NA; y[length(y)+1]<-unit.effect[i] + runif(1, min=-2, max=2)}

		}

	}

	dat<-cbind(unit, period, x, z, y, l.y)
	dat<-subset(dat, period>0)
	dat<-as.data.frame(dat)

  est[r,]<-c(0,coefficients(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))[c(1,2,3,14)])
	bias[r,]<-c(0,coefficients(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))[c(1,2,3)])




  if(r==1){rmse<-c()}
  y.correct<-b.intercept[r]+b.lag[r]*dat$l.y+b.x[r]*dat$x+b.z[r]*dat$z+b.int[r]*dat$l.y*dat$x+unit.effect[dat$unit]
  y.pred<-predict(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))
  rmse[r]<-sqrt(mean((y.pred-y.correct)^2))
  if(r==1){rmse.misspec<-c()}
  y.misspec<-predict(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))
  rmse.misspec[r]<-sqrt(mean((y.misspec-y.correct)^2))
  

}

close(pb)

rmse.lo[2]<-quantile(rmse, probs=0.025)
rmse.mid[2]<-quantile(rmse, probs=0.5)
rmse.hi[2]<-quantile(rmse, probs=0.975)

rmse.misspec.lo[2]<-quantile(rmse.misspec, probs=0.025)
rmse.misspec.mid[2]<-quantile(rmse.misspec, probs=0.5)
rmse.misspec.hi[2]<-quantile(rmse.misspec, probs=0.975)


accuracy.x.lo[2]<-quantile(est[,3]-true[,3], probs=0.025)
accuracy.x[2]<-quantile(est[,3]-true[,3], probs=0.5)
accuracy.x.hi[2]<-quantile(est[,3]-true[,3], probs=0.975)

accuracy.prod.lo[2]<-quantile(est[,5]-true[,5], probs=0.025)
accuracy.prod[2]<-quantile(est[,5]-true[,5], probs=0.5)
accuracy.prod.hi[2]<-quantile(est[,5]-true[,5], probs=0.975)

accuracy.z.lo[2]<-quantile(est[,4]-true[,4], probs=0.025)
accuracy.z[2]<-quantile(est[,4]-true[,4], probs=0.5)
accuracy.z.hi[2]<-quantile(est[,4]-true[,4], probs=0.975)

accuracy.lag.lo[2]<-quantile(est[,2]-true[,2], probs=0.025)
accuracy.lag[2]<-quantile(est[,2]-true[,2], probs=0.5)
accuracy.lag.hi[2]<-quantile(est[,2]-true[,2], probs=0.975)









##############################################
# Model WITH unit effects
# N = 50, T = 20
#
##############################################


n.t<-20                     # How many observations per panel (time length)?

# draw the beta coefficients for each model
b.intercept<-runif(reps, min=-3, max=3)
b.x<-runif(reps, min=-2, max=2)
b.int<-runif(reps, min=-0.4, max=0.4)
b.lag<-runif(reps, min=-0.4, max=0.4)
b.z<-runif(reps, min=-2, max=2)

print(paste("Simulating n= ", n.unit, ", t= ", n.t, sep=""), quote=F)  

#store the true and estimated betas, where estimates come from a biased and a correct model
true<-cbind(b.intercept,b.lag,b.x,b.z,b.int)
est<-matrix(data=NA, nrow=reps, ncol=5)
bias<-matrix(data=NA, nrow=reps, ncol=4)

pb <- txtProgressBar(min = 0, max = reps, style = 3)

# run the monte carlo simulations
for(r in 1:reps){

	
	setTxtProgressBar(pb, r)
	
	unit<-c()
	period<-c()
	x<-c()
	z<-c()
	y<-c()
	l.y<-c()
	unit.effect<-runif(n.unit, min=-3, max=3)

	for(i in 1:n.unit){

		for(j in 1:(n.t+1)){

			unit[length(unit)+1]<-i
			period[length(period)+1]<-(j-1)
			x[length(x)+1]<-runif(1, min=-3, max=3)
      z[length(z)+1]<-runif(1, min=-3, max=3)

			if(j!=1){l.y[length(l.y)+1] <- y[length(y)]; y[length(y) + 1 ] <- unit.effect[i] + b.lag[r]*y[length(y)] + (b.x[r]+b.int[r]*y[length(y)])*x[length(x)]+b.z[r]*z[length(z)]+rnorm(1, mean=0, sd=3)}
			else{l.y[length(l.y)+1]<-NA; y[length(y)+1]<-unit.effect[i] + runif(1, min=-2, max=2)}

		}

	}

	dat<-cbind(unit, period, x, z, y, l.y)
	dat<-subset(dat, period>0)
	dat<-as.data.frame(dat)

  est[r,]<-c(0,coefficients(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))[c(1,2,3,14)])
	bias[r,]<-c(0,coefficients(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))[c(1,2,3)])




  if(r==1){rmse<-c()}
  y.correct<-b.intercept[r]+b.lag[r]*dat$l.y+b.x[r]*dat$x+b.z[r]*dat$z+b.int[r]*dat$l.y*dat$x+unit.effect[dat$unit]
  y.pred<-predict(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))
  rmse[r]<-sqrt(mean((y.pred-y.correct)^2))
  if(r==1){rmse.misspec<-c()}
  y.misspec<-predict(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))
  rmse.misspec[r]<-sqrt(mean((y.misspec-y.correct)^2))
  

}

close(pb)

rmse.lo[3]<-quantile(rmse, probs=0.025)
rmse.mid[3]<-quantile(rmse, probs=0.5)
rmse.hi[3]<-quantile(rmse, probs=0.975)

rmse.misspec.lo[3]<-quantile(rmse.misspec, probs=0.025)
rmse.misspec.mid[3]<-quantile(rmse.misspec, probs=0.5)
rmse.misspec.hi[3]<-quantile(rmse.misspec, probs=0.975)



accuracy.x.lo[3]<-quantile(est[,3]-true[,3], probs=0.025)
accuracy.x[3]<-quantile(est[,3]-true[,3], probs=0.5)
accuracy.x.hi[3]<-quantile(est[,3]-true[,3], probs=0.975)

accuracy.prod.lo[3]<-quantile(est[,5]-true[,5], probs=0.025)
accuracy.prod[3]<-quantile(est[,5]-true[,5], probs=0.5)
accuracy.prod.hi[3]<-quantile(est[,5]-true[,5], probs=0.975)

accuracy.z.lo[3]<-quantile(est[,4]-true[,4], probs=0.025)
accuracy.z[3]<-quantile(est[,4]-true[,4], probs=0.5)
accuracy.z.hi[3]<-quantile(est[,4]-true[,4], probs=0.975)

accuracy.lag.lo[3]<-quantile(est[,2]-true[,2], probs=0.025)
accuracy.lag[3]<-quantile(est[,2]-true[,2], probs=0.5)
accuracy.lag.hi[3]<-quantile(est[,2]-true[,2], probs=0.975)







##############################################
# Model WITH unit effects
# N = 50, T = 30
#
##############################################


n.t<-30                     # How many observations per panel (time length)?

# draw the beta coefficients for each model
b.intercept<-runif(reps, min=-3, max=3)
b.x<-runif(reps, min=-2, max=2)
b.int<-runif(reps, min=-0.4, max=0.4)
b.lag<-runif(reps, min=-0.4, max=0.4)
b.z<-runif(reps, min=-2, max=2)

print(paste("Simulating n= ", n.unit, ", t= ", n.t, sep=""), quote=F)  

#store the true and estimated betas, where estimates come from a biased and a correct model
true<-cbind(b.intercept,b.lag,b.x,b.z,b.int)
est<-matrix(data=NA, nrow=reps, ncol=5)
bias<-matrix(data=NA, nrow=reps, ncol=4)

pb <- txtProgressBar(min = 0, max = reps, style = 3)

# run the monte carlo simulations
for(r in 1:reps){

	
	setTxtProgressBar(pb, r)
	
	unit<-c()
	period<-c()
	x<-c()
	z<-c()
	y<-c()
	l.y<-c()
	unit.effect<-runif(n.unit, min=-3, max=3)

	for(i in 1:n.unit){

		for(j in 1:(n.t+1)){

			unit[length(unit)+1]<-i
			period[length(period)+1]<-(j-1)
			x[length(x)+1]<-runif(1, min=-3, max=3)
      z[length(z)+1]<-runif(1, min=-3, max=3)

			if(j!=1){l.y[length(l.y)+1] <- y[length(y)]; y[length(y) + 1 ] <- unit.effect[i] + b.lag[r]*y[length(y)] + (b.x[r]+b.int[r]*y[length(y)])*x[length(x)]+b.z[r]*z[length(z)]+rnorm(1, mean=0, sd=3)}
			else{l.y[length(l.y)+1]<-NA; y[length(y)+1]<-unit.effect[i] + runif(1, min=-2, max=2)}

		}

	}

	dat<-cbind(unit, period, x, z, y, l.y)
	dat<-subset(dat, period>0)
	dat<-as.data.frame(dat)

  est[r,]<-c(0,coefficients(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))[c(1,2,3,14)])
	bias[r,]<-c(0,coefficients(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))[c(1,2,3)])




  if(r==1){rmse<-c()}
  y.correct<-b.intercept[r]+b.lag[r]*dat$l.y+b.x[r]*dat$x+b.z[r]*dat$z+b.int[r]*dat$l.y*dat$x+unit.effect[dat$unit]
  y.pred<-predict(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))
  rmse[r]<-sqrt(mean((y.pred-y.correct)^2))
  if(r==1){rmse.misspec<-c()}
  y.misspec<-predict(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))
  rmse.misspec[r]<-sqrt(mean((y.misspec-y.correct)^2))
  

}

close(pb)

rmse.lo[4]<-quantile(rmse, probs=0.025)
rmse.mid[4]<-quantile(rmse, probs=0.5)
rmse.hi[4]<-quantile(rmse, probs=0.975)

rmse.misspec.lo[4]<-quantile(rmse.misspec, probs=0.025)
rmse.misspec.mid[4]<-quantile(rmse.misspec, probs=0.5)
rmse.misspec.hi[4]<-quantile(rmse.misspec, probs=0.975)


accuracy.x.lo[4]<-quantile(est[,3]-true[,3], probs=0.025)
accuracy.x[4]<-quantile(est[,3]-true[,3], probs=0.5)
accuracy.x.hi[4]<-quantile(est[,3]-true[,3], probs=0.975)

accuracy.prod.lo[4]<-quantile(est[,5]-true[,5], probs=0.025)
accuracy.prod[4]<-quantile(est[,5]-true[,5], probs=0.5)
accuracy.prod.hi[4]<-quantile(est[,5]-true[,5], probs=0.975)

accuracy.z.lo[4]<-quantile(est[,4]-true[,4], probs=0.025)
accuracy.z[4]<-quantile(est[,4]-true[,4], probs=0.5)
accuracy.z.hi[4]<-quantile(est[,4]-true[,4], probs=0.975)

accuracy.lag.lo[4]<-quantile(est[,2]-true[,2], probs=0.025)
accuracy.lag[4]<-quantile(est[,2]-true[,2], probs=0.5)
accuracy.lag.hi[4]<-quantile(est[,2]-true[,2], probs=0.975)







##############################################
# Model WITH unit effects
# N = 50, T = 40
#
##############################################


n.t<-40                     # How many observations per panel (time length)?

# draw the beta coefficients for each model
b.intercept<-runif(reps, min=-3, max=3)
b.x<-runif(reps, min=-2, max=2)
b.int<-runif(reps, min=-0.4, max=0.4)
b.lag<-runif(reps, min=-0.4, max=0.4)
b.z<-runif(reps, min=-2, max=2)

print(paste("Simulating n= ", n.unit, ", t= ", n.t, sep=""), quote=F)  

#store the true and estimated betas, where estimates come from a biased and a correct model
true<-cbind(b.intercept,b.lag,b.x,b.z,b.int)
est<-matrix(data=NA, nrow=reps, ncol=5)
bias<-matrix(data=NA, nrow=reps, ncol=4)

pb <- txtProgressBar(min = 0, max = reps, style = 3)

# run the monte carlo simulations
for(r in 1:reps){

	
	setTxtProgressBar(pb, r)
	
	unit<-c()
	period<-c()
	x<-c()
	z<-c()
	y<-c()
	l.y<-c()
	unit.effect<-runif(n.unit, min=-3, max=3)

	for(i in 1:n.unit){

		for(j in 1:(n.t+1)){

			unit[length(unit)+1]<-i
			period[length(period)+1]<-(j-1)
			x[length(x)+1]<-runif(1, min=-3, max=3)
      z[length(z)+1]<-runif(1, min=-3, max=3)

			if(j!=1){l.y[length(l.y)+1] <- y[length(y)]; y[length(y) + 1 ] <- unit.effect[i] + b.lag[r]*y[length(y)] + (b.x[r]+b.int[r]*y[length(y)])*x[length(x)]+b.z[r]*z[length(z)]+rnorm(1, mean=0, sd=3)}
			else{l.y[length(l.y)+1]<-NA; y[length(y)+1]<-unit.effect[i] + runif(1, min=-2, max=2)}

		}

	}

	dat<-cbind(unit, period, x, z, y, l.y)
	dat<-subset(dat, period>0)
	dat<-as.data.frame(dat)

  est[r,]<-c(0,coefficients(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))[c(1,2,3,14)])
	bias[r,]<-c(0,coefficients(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))[c(1,2,3)])




  if(r==1){rmse<-c()}
  y.correct<-b.intercept[r]+b.lag[r]*dat$l.y+b.x[r]*dat$x+b.z[r]*dat$z+b.int[r]*dat$l.y*dat$x+unit.effect[dat$unit]
  y.pred<-predict(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))
  rmse[r]<-sqrt(mean((y.pred-y.correct)^2))
  if(r==1){rmse.misspec<-c()}
  y.misspec<-predict(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))
  rmse.misspec[r]<-sqrt(mean((y.misspec-y.correct)^2))
  

}

close(pb)

rmse.lo[5]<-quantile(rmse, probs=0.025)
rmse.mid[5]<-quantile(rmse, probs=0.5)
rmse.hi[5]<-quantile(rmse, probs=0.975)

rmse.misspec.lo[5]<-quantile(rmse.misspec, probs=0.025)
rmse.misspec.mid[5]<-quantile(rmse.misspec, probs=0.5)
rmse.misspec.hi[5]<-quantile(rmse.misspec, probs=0.975)


accuracy.x.lo[5]<-quantile(est[,3]-true[,3], probs=0.025)
accuracy.x[5]<-quantile(est[,3]-true[,3], probs=0.5)
accuracy.x.hi[5]<-quantile(est[,3]-true[,3], probs=0.975)

accuracy.prod.lo[5]<-quantile(est[,5]-true[,5], probs=0.025)
accuracy.prod[5]<-quantile(est[,5]-true[,5], probs=0.5)
accuracy.prod.hi[5]<-quantile(est[,5]-true[,5], probs=0.975)

accuracy.z.lo[5]<-quantile(est[,4]-true[,4], probs=0.025)
accuracy.z[5]<-quantile(est[,4]-true[,4], probs=0.5)
accuracy.z.hi[5]<-quantile(est[,4]-true[,4], probs=0.975)

accuracy.lag.lo[5]<-quantile(est[,2]-true[,2], probs=0.025)
accuracy.lag[5]<-quantile(est[,2]-true[,2], probs=0.5)
accuracy.lag.hi[5]<-quantile(est[,2]-true[,2], probs=0.975)





##############################################
# Model WITH unit effects
# N = 50, T = 50
#
##############################################


n.t<-50                     # How many observations per panel (time length)?

# draw the beta coefficients for each model
b.intercept<-runif(reps, min=-3, max=3)
b.x<-runif(reps, min=-2, max=2)
b.int<-runif(reps, min=-0.4, max=0.4)
b.lag<-runif(reps, min=-0.4, max=0.4)
b.z<-runif(reps, min=-2, max=2)

print(paste("Simulating n= ", n.unit, ", t= ", n.t, sep=""), quote=F)  

#store the true and estimated betas, where estimates come from a biased and a correct model
true<-cbind(b.intercept,b.lag,b.x,b.z,b.int)
est<-matrix(data=NA, nrow=reps, ncol=5)
bias<-matrix(data=NA, nrow=reps, ncol=4)

pb <- txtProgressBar(min = 0, max = reps, style = 3)

# run the monte carlo simulations
for(r in 1:reps){

	
	setTxtProgressBar(pb, r)
	
	unit<-c()
	period<-c()
	x<-c()
	z<-c()
	y<-c()
	l.y<-c()
	unit.effect<-runif(n.unit, min=-3, max=3)

	for(i in 1:n.unit){

		for(j in 1:(n.t+1)){

			unit[length(unit)+1]<-i
			period[length(period)+1]<-(j-1)
			x[length(x)+1]<-runif(1, min=-3, max=3)
      z[length(z)+1]<-runif(1, min=-3, max=3)

			if(j!=1){l.y[length(l.y)+1] <- y[length(y)]; y[length(y) + 1 ] <- unit.effect[i] + b.lag[r]*y[length(y)] + (b.x[r]+b.int[r]*y[length(y)])*x[length(x)]+b.z[r]*z[length(z)]+rnorm(1, mean=0, sd=3)}
			else{l.y[length(l.y)+1]<-NA; y[length(y)+1]<-unit.effect[i] + runif(1, min=-2, max=2)}

		}

	}

	dat<-cbind(unit, period, x, z, y, l.y)
	dat<-subset(dat, period>0)
	dat<-as.data.frame(dat)

  est[r,]<-c(0,coefficients(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))[c(1,2,3,14)])
	bias[r,]<-c(0,coefficients(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))[c(1,2,3)])




  if(r==1){rmse<-c()}
  y.correct<-b.intercept[r]+b.lag[r]*dat$l.y+b.x[r]*dat$x+b.z[r]*dat$z+b.int[r]*dat$l.y*dat$x+unit.effect[dat$unit]
  y.pred<-predict(lm(y~l.y*x+z+as.factor(unit)-1, data=dat))
  rmse[r]<-sqrt(mean((y.pred-y.correct)^2))
  if(r==1){rmse.misspec<-c()}
  y.misspec<-predict(lm(y~l.y+x+z+as.factor(unit)-1, data=dat))
  rmse.misspec[r]<-sqrt(mean((y.misspec-y.correct)^2))
  

}

close(pb)

rmse.lo[6]<-quantile(rmse, probs=0.025)
rmse.mid[6]<-quantile(rmse, probs=0.5)
rmse.hi[6]<-quantile(rmse, probs=0.975)

rmse.misspec.lo[6]<-quantile(rmse.misspec, probs=0.025)
rmse.misspec.mid[6]<-quantile(rmse.misspec, probs=0.5)
rmse.misspec.hi[6]<-quantile(rmse.misspec, probs=0.975)

accuracy.x.lo[6]<-quantile(est[,3]-true[,3], probs=0.025)
accuracy.x[6]<-quantile(est[,3]-true[,3], probs=0.5)
accuracy.x.hi[6]<-quantile(est[,3]-true[,3], probs=0.975)

accuracy.prod.lo[6]<-quantile(est[,5]-true[,5], probs=0.025)
accuracy.prod[6]<-quantile(est[,5]-true[,5], probs=0.5)
accuracy.prod.hi[6]<-quantile(est[,5]-true[,5], probs=0.975)

accuracy.z.lo[6]<-quantile(est[,4]-true[,4], probs=0.025)
accuracy.z[6]<-quantile(est[,4]-true[,4], probs=0.5)
accuracy.z.hi[6]<-quantile(est[,4]-true[,4], probs=0.975)

accuracy.lag.lo[6]<-quantile(est[,2]-true[,2], probs=0.025)
accuracy.lag[6]<-quantile(est[,2]-true[,2], probs=0.5)
accuracy.lag.hi[6]<-quantile(est[,2]-true[,2], probs=0.975)


n.t<-c(5,10,20,30,40,50)
n.50.mat<-cbind(rmse.mid,n.t)
write.csv(n.50.mat, file="rmse_n50.csv")

accuracy.lag.50.mat<-cbind(accuracy.lag,n.t)
write.csv(accuracy.lag.50.mat, file="lag_n50.csv")






####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################



# Assess RMSE prediction accuracy for different N
rm(list=ls())
n.10.mat<-read.csv(file="rmse_n10.csv")
n.20.mat<-read.csv(file="rmse_n20.csv")
n.50.mat<-read.csv(file="rmse_n50.csv")

par(mfrow=c(1,1), las=1,mar=c(5,5,4,2))
plot(n.10.mat[,2]~n.10.mat[,3], type="l", ylim=c(min(n.10.mat[,2],n.20.mat[,2],n.50.mat[,2]), max(n.10.mat[,2],n.20.mat[,2],n.50.mat[,2])), main=c("RMSE for Predicting Dependent Variable", "for Increasing N"), xlab="T", ylab="Median RMSE from Simulations")
lines(n.20.mat[,2]~n.10.mat[,3], lty=2)
lines(n.50.mat[,2]~n.10.mat[,3], lwd=3)
legend("right", lwd=c(1,1,3), lty=c(1,2,1), legend=c("N=10", "N=20", "N=50"))

dev.copy2eps(file="RMSE-N-assessment.eps")

# Assess Lag coefficient accuracy for different N

rm(list=ls())
n.10.mat<-read.csv(file="lag_n10.csv")
n.20.mat<-read.csv(file="lag_n20.csv")
n.50.mat<-read.csv(file="lag_n50.csv")

par(mfrow=c(1,1), las=1,mar=c(5,5,4,2))
plot(n.10.mat[,2]~n.10.mat[,3], type="l", ylim=c(min(n.10.mat[,2],n.20.mat[,2],n.50.mat[,2]), max(0.005,n.10.mat[,2],n.20.mat[,2],n.50.mat[,2])), main="Median Lag Coefficient Accuracy from Simulations, FE Model", xlab="T", ylab="estimate - true value")
lines(n.20.mat[,2]~n.10.mat[,3], lty=2)
lines(n.50.mat[,2]~n.10.mat[,3], lwd=3)
abline(h=0, lty=3)
legend("right", lwd=c(1,1,3), lty=c(1,2,1), legend=c("N=10", "N=20", "N=50"))

dev.copy2eps(file="lag-N-assessment.eps")







####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################
####################################################################################


















###########################################################################
# Simulations to Determine Specification of Causal Endogeneity Models
# June 8, 2011
# Models WITH Unit Effects
###########################################################################

rm(list=ls())
set.seed(325890)
reps<-1000
library(stats4)


##############################################
# Model WITH unit effects
# N = 10, T = 20
#
##############################################


n.unit<-10                 # How many units (panels)?
n.t<-20                    # How many observations per panel (time length)?

# draw the beta coefficients for each model
b.intercept<-runif(reps, min=-3, max=3)
b.x<-runif(reps, min=-2, max=2)
b.int<-runif(reps, min=0.1, max=0.4)*sign(runif(reps, min=-1, max=1))
b.lag<-runif(reps, min=-0.4, max=0.4)
b.z<-runif(reps, min=-2, max=2)

print(paste("Simulating_n",n.unit,"_t",n.t, sep=""), quote=F)  

# store interaction p-value and AIC for model w/ and w/o interaction term
int.p.val<-c()
no.int.p.val<-c()
aic.int<-c()
aic.no.int<-c()
bic.int<-c()
bic.no.int<-c()


pb <- txtProgressBar(min = 0, max = reps, style = 3)

# run the monte carlo simulations
for(r in 1:reps){

	
	setTxtProgressBar(pb, r)
	
	unit<-c()
	period<-c()
	x<-c()
	z<-c()
	y<-c()
	l.y<-c()
	unit.effect<-runif(n.unit, min=-3, max=3)

  # generate data WITH interaction effects
	for(i in 1:n.unit){

		for(j in 1:(n.t+1)){

			unit[length(unit)+1]<-i
			period[length(period)+1]<-(j-1)
			x[length(x)+1]<-runif(1, min=-3, max=3)
      z[length(z)+1]<-runif(1, min=-3, max=3)

			if(j!=1){l.y[length(l.y)+1] <- y[length(y)]; y[length(y) + 1 ] <- unit.effect[i] + b.lag[r]*y[length(y)] + (b.x[r]+b.int[r]*y[length(y)])*x[length(x)]+b.z[r]*z[length(z)]+rnorm(1, mean=0, sd=3)}
			else{l.y[length(l.y)+1]<-NA; y[length(y)+1]<-unit.effect[i] + runif(1, min=-2, max=2)}

		}

	}

	dat<-cbind(unit, period, x, z, y, l.y)
	dat<-subset(dat, period>0)
	dat<-as.data.frame(dat)


  # generate data WITHOUT interaction effects
	unit<-c()
	period<-c()
	x<-c()
	z<-c()
	y<-c()
	l.y<-c()
	unit.effect<-runif(n.unit, min=-3, max=3)

	for(i in 1:n.unit){

		for(j in 1:(n.t+1)){

			unit[length(unit)+1]<-i
			period[length(period)+1]<-(j-1)
			x[length(x)+1]<-runif(1, min=-3, max=3)
      z[length(z)+1]<-runif(1, min=-3, max=3)

			if(j!=1){l.y[length(l.y)+1] <- y[length(y)]; y[length(y) + 1 ] <- unit.effect[i] + b.lag[r]*y[length(y)] + b.x[r]*x[length(x)]+b.z[r]*z[length(z)]+rnorm(1, mean=0, sd=3)}
			else{l.y[length(l.y)+1]<-NA; y[length(y)+1]<-unit.effect[i] + runif(1, min=-2, max=2)}

		}

	}

	dat.noint<-cbind(unit, period, x, z, y, l.y)
	dat.noint<-subset(dat.noint, period>0)
	dat.noint<-as.data.frame(dat.noint)

  model<-lm(y~l.y*x+z+as.factor(unit)-1, data=dat)
  model.two<-lm(y~l.y+x+z+as.factor(unit)-1, data=dat)
	int.p.val[r]<-summary(model)$coefficients[14,4]
  aic.int[r]<-AIC(model)<AIC(model.two)
  bic.int[r]<-BIC(model)<BIC(model.two)

  model<-lm(y~l.y*x+z+as.factor(unit)-1, data=dat.noint)
  model.two<-lm(y~l.y+x+z+as.factor(unit)-1, data=dat.noint)
	no.int.p.val[r]<-summary(model)$coefficients[14,4]
  aic.no.int[r]<-AIC(model)<AIC(model.two)
  bic.no.int[r]<-BIC(model)<BIC(model.two)



}

close(pb)

# False = Choose No Product Model, True = Choose Product Model
summary(int.p.val<0.05)
# False = Choose No Product Model, True = Choose Product Model
summary(aic.int)
# False = Choose No Product Model, True = Choose Product Model
summary(bic.int)

# False = Choose No Product Model, True = Choose Product Model
summary(no.int.p.val<0.05)
# False = Choose No Product Model, True = Choose Product Model
summary(aic.no.int)
# False = Choose No Product Model, True = Choose Product Model
summary(bic.no.int)




